require(mltest)

#### confusion matrix (last three sentences) ####
data <- read.csv("C1_main_data.csv")

# Table A.1
confusion.matrix.3s <- rbind(colSums(data[, c("K_K_3s", "K_L_3s", "K_D_3s", "K_C_3s")]), 
                             colSums(data[, c("L_K_3s", "L_L_3s", "L_D_3s", "L_C_3s")]), 
                             colSums(data[, c("D_K_3s", "D_L_3s", "D_D_3s", "D_C_3s")]), 
                             colSums(data[, c("C_K_3s", "C_L_3s", "C_D_3s", "C_C_3s")]))
rownames(confusion.matrix.3s) <- c("Komeito", "LDP", "DPJ", "JCP")
confusion.matrix.3s <- addmargins(as.matrix(confusion.matrix.3s))
confusion.matrix.3s
round(prop.table(confusion.matrix.3s[, -5], margin = 1) * 100, 1)
round(prop.table(confusion.matrix.3s[-5, ], margin = 2) * 100, 1)

predicted.values.3s <- data.frame(factor(c(rep("K", confusion.matrix.3s[1, 1]), 
                                           rep("L", confusion.matrix.3s[1, 2]), 
                                           rep("D", confusion.matrix.3s[1, 3]), 
                                           rep("C", confusion.matrix.3s[1, 4]), 
                                           rep("K", confusion.matrix.3s[2, 1]), 
                                           rep("L", confusion.matrix.3s[2, 2]), 
                                           rep("D", confusion.matrix.3s[2, 3]), 
                                           rep("C", confusion.matrix.3s[2, 4]), 
                                           rep("K", confusion.matrix.3s[3, 1]), 
                                           rep("L", confusion.matrix.3s[3, 2]), 
                                           rep("D", confusion.matrix.3s[3, 3]), 
                                           rep("C", confusion.matrix.3s[3, 4]), 
                                           rep("K", confusion.matrix.3s[4, 1]), 
                                           rep("L", confusion.matrix.3s[4, 2]), 
                                           rep("D", confusion.matrix.3s[4, 3]), 
                                           rep("C", confusion.matrix.3s[4, 4])), 
                                         levels = c("K", "L", "D", "C")), 
                                  factor(c(rep("K", confusion.matrix.3s[1, 5]), 
                                           rep("L", confusion.matrix.3s[2, 5]), 
                                           rep("D", confusion.matrix.3s[3, 5]), 
                                           rep("C", confusion.matrix.3s[4, 5])), 
                                         levels = c("K", "L", "D", "C")))

# accuracy
round(ml_test(predicted.values.3s[, 1], predicted.values.3s[, 2])$accuracy, 3)

# F1 score
round(mean(ml_test(predicted.values.3s[, 1], predicted.values.3s[, 2])$F1), 3)

# recall rate for Komeito
round(ml_test(predicted.values.3s[, 1], predicted.values.3s[, 2])$recall, 3)

#### feature importance ####
feature.importance <- read.csv("C2_feature_importance.csv")

# Table A.2
feature.data <- cbind(feature = feature.importance$feature, 
                      importance = round(feature.importance$importance), 
                      feature.importance[, 3:6], 
                      total = rowSums(feature.importance[, 3:6]), 
                      round(feature.importance[, 3:6] / 
                              rowSums(feature.importance[, 3:6]), 2))
head(subset(feature.data, total > 4999), 20)
colSums(feature.importance[, 3:6])
round(colSums(feature.importance[, 3:6]) / sum(feature.importance[, 3:6]), 2)
sum(feature.importance[, 3:6])

#### confusion matrix (last two sentences) ####
confusion.matrix.2s <- rbind(colSums(data[, c("K_K_2s", "K_L_2s", "K_D_2s", "K_C_2s")]), 
                             colSums(data[, c("L_K_2s", "L_L_2s", "L_D_2s", "L_C_2s")]), 
                             colSums(data[, c("D_K_2s", "D_L_2s", "D_D_2s", "D_C_2s")]), 
                             colSums(data[, c("C_K_2s", "C_L_2s", "C_D_2s", "C_C_2s")]))
confusion.matrix.2s <- addmargins(as.matrix(confusion.matrix.2s))
predicted.values.2s <- data.frame(factor(c(rep("K", confusion.matrix.2s[1, 1]), 
                                           rep("L", confusion.matrix.2s[1, 2]), 
                                           rep("D", confusion.matrix.2s[1, 3]), 
                                           rep("C", confusion.matrix.2s[1, 4]), 
                                           rep("K", confusion.matrix.2s[2, 1]), 
                                           rep("L", confusion.matrix.2s[2, 2]), 
                                           rep("D", confusion.matrix.2s[2, 3]), 
                                           rep("C", confusion.matrix.2s[2, 4]), 
                                           rep("K", confusion.matrix.2s[3, 1]), 
                                           rep("L", confusion.matrix.2s[3, 2]), 
                                           rep("D", confusion.matrix.2s[3, 3]), 
                                           rep("C", confusion.matrix.2s[3, 4]), 
                                           rep("K", confusion.matrix.2s[4, 1]), 
                                           rep("L", confusion.matrix.2s[4, 2]), 
                                           rep("D", confusion.matrix.2s[4, 3]), 
                                           rep("C", confusion.matrix.2s[4, 4])), 
                                         levels = c("K", "L", "D", "C")), 
                                  factor(c(rep("K", confusion.matrix.2s[1, 5]), 
                                           rep("L", confusion.matrix.2s[2, 5]), 
                                           rep("D", confusion.matrix.2s[3, 5]), 
                                           rep("C", confusion.matrix.2s[4, 5])), 
                                         levels = c("K", "L", "D", "C")))

# accuracy
round(ml_test(predicted.values.2s[, 1], predicted.values.2s[, 2])$accuracy, 3)

# F1 score
round(mean(ml_test(predicted.values.2s[, 1], predicted.values.2s[, 2])$F1), 3)

# recall rate for Komeito
round(ml_test(predicted.values.2s[, 1], predicted.values.2s[, 2])$recall, 3)

#### confusion matrix (last four sentences) ####
confusion.matrix.4s <- rbind(colSums(data[, c("K_K_4s", "K_L_4s", "K_D_4s", "K_C_4s")]), 
                             colSums(data[, c("L_K_4s", "L_L_4s", "L_D_4s", "L_C_4s")]), 
                             colSums(data[, c("D_K_4s", "D_L_4s", "D_D_4s", "D_C_4s")]), 
                             colSums(data[, c("C_K_4s", "C_L_4s", "C_D_4s", "C_C_4s")]))
confusion.matrix.4s <- addmargins(as.matrix(confusion.matrix.4s))
predicted.values.4s <- data.frame(factor(c(rep("K", confusion.matrix.4s[1, 1]), 
                                           rep("L", confusion.matrix.4s[1, 2]), 
                                           rep("D", confusion.matrix.4s[1, 3]), 
                                           rep("C", confusion.matrix.4s[1, 4]), 
                                           rep("K", confusion.matrix.4s[2, 1]), 
                                           rep("L", confusion.matrix.4s[2, 2]), 
                                           rep("D", confusion.matrix.4s[2, 3]), 
                                           rep("C", confusion.matrix.4s[2, 4]), 
                                           rep("K", confusion.matrix.4s[3, 1]), 
                                           rep("L", confusion.matrix.4s[3, 2]), 
                                           rep("D", confusion.matrix.4s[3, 3]), 
                                           rep("C", confusion.matrix.4s[3, 4]), 
                                           rep("K", confusion.matrix.4s[4, 1]), 
                                           rep("L", confusion.matrix.4s[4, 2]), 
                                           rep("D", confusion.matrix.4s[4, 3]), 
                                           rep("C", confusion.matrix.4s[4, 4])), 
                                         levels = c("K", "L", "D", "C")), 
                                  factor(c(rep("K", confusion.matrix.4s[1, 5]), 
                                           rep("L", confusion.matrix.4s[2, 5]), 
                                           rep("D", confusion.matrix.4s[3, 5]), 
                                           rep("C", confusion.matrix.4s[4, 5])), 
                                         levels = c("K", "L", "D", "C")))

# accuracy
round(ml_test(predicted.values.4s[, 1], predicted.values.4s[, 2])$accuracy, 3)

# F1 score
round(mean(ml_test(predicted.values.4s[, 1], predicted.values.4s[, 2])$F1), 3)

# recall rate for Komeito
round(ml_test(predicted.values.4s[, 1], predicted.values.4s[, 2])$recall, 3)